import pandas as pd
import numpy as np
import networkx as nx


def stationsInLine():
    """
    输出每条线的站点编号

    :return: None
    """
    station_data = pd.read_csv('data/test_station.csv', encoding='gbk')
    station_data['line_name'] = station_data['line_name'].str.rstrip('号线').astype(np.int64)
    station_data.sort_values(by=['line_name', 'sequence'], ascending=True, inplace=True)
    print(station_data)
    for line in station_data['line_name'].unique():
        print(line, station_data.loc[station_data['line_name'] == line, 'station_name'].values)


def matchOD():
    trips = pd.read_csv('data/trips.csv', usecols=[1, 2, 3])
    trips['进站时间'] = pd.to_datetime(trips['进站时间'])
    trips.set_index('进站时间', inplace=True)
    trips = trips.loc[trips['进站名称'] != trips['出站名称']]
    trips['进出站'] = trips['进站名称'].str.cat(trips['出站名称'], sep='-')
    trips.drop(columns=['进站名称', '出站名称'], inplace=True)
    trips.index = trips.index.to_period('h')
    OD = trips.groupby(by=['进站时间', '进出站']).size().sort_index(axis=0)
    OD.name = '次数'
    OD.to_csv('data/intermediate/OD-matching.csv')
